library(tidyverse)
library(readxl)
library(ggforce)
library(concaveman)
library(knitr)
library(olsrr)
library(ranger)
library(Metrics)
library(mgcv)
library(caret)
set.seed(3630)
# Strike Zone GG Object
geom_zone <- function(top = 11/3, bottom = 3/2, linecolor = "black"){
geom_rect(xmin = -.7083, xmax = .7083, ymin = bottom, ymax = top,
alpha = 0, color = linecolor, linewidth = 0.75)
}
# c(0, 0, -.25, -.5, -.25))
# Home Plate GG Object
geom_plate <- function(pov = "pitcher"){
df <- case_when(
pov == "pitcher" ~
data.frame(x = c(-.7083, .7083, .7083 ,0, -.7083), y = c(0, 0, .25, .5, .25)),
pov == "catcher" ~
data.frame(x = c(-.7083, .7083, .7083 ,0, -.7083), y = c(0, 0, -.25, -.5, -.25))
)
g <- geom_polygon(data = df, aes(x = x, y = y), fill = "white", color = "black", linewidth = 1.25)
g
}
# Barrel Function
is.barrel <- function(LA, EV){
upper <- 1.11*EV - 78.89
lower <- -EV + 124
outcome <- (LA >= lower) & (LA <= upper) & (EV >= 98) & (LA >= 8) & (LA <= 50)
outcome <- replace_na(outcome, FALSE)
outcome
}
# Normal Name Changer
swap_names <- function(name) {
parts <- strsplit(name, ", ")[[1]]
if (length(parts) == 2) {
return(paste(rev(parts), collapse = " "))
} else {
return(name)
}
}
seasonal <- read_csv("CSVs/season_stats.csv")
pitchers <- read_csv("CSVs/pitcher_comps.csv")
arsenal <- read_csv("CSVs/arsenal.csv")
empty <- read_csv("CSVs/bases_empty.csv")
whiff <- pitchers %>%
mutate(whiff = description == "swinging_strike",
whiff = as.character(whiff)) %>%
filter(pitch_type != "NA",
pitch_type != "PO")
LHP <- read_csv("CSVs/lhp_pitches.csv") %>%
select(-...1) %>%
filter(!is.na(pitch_type)) %>%
mutate(pitch_type = str_replace(pitch_type, "CS", "CU"),
pitch_name = str_replace(pitch_name, "Slow Curve", "Curveball"),
pitch_type = str_replace(pitch_type, "KC", "CU"),
pitch_name = str_replace(pitch_name, "Knuckle Curve", "Curveball"))
whiff_l <- LHP %>%
mutate(whiff = description == "swinging_strike",
whiff = as.character(whiff)) %>%
filter(pitch_type != "NA",
pitch_type != "PO")
# Model Data (Pitch = Slider, Pitching Hand = Right)
model_data <- arsenal %>%
filter(pitch_type == "SL",
pitch_hand == "R") %>%
mutate(ovr_break = sqrt(pitcher_break_x^2 + pitcher_break_z^2))
# Simple Linear Regression
lm_simple <- lm(xwOBA ~
pitch_speed + spin_rate + pitcher_break_x + pitcher_break_z +
pitch_usage + ovr_break,
data = model_data)
# Overview of all model combinations
model_all <- ols_step_all_possible(lm_simple)
# Backwards Elimination
lm_simple %>% ols_step_backward_p(penter = 0.2)
##
##
## Stepwise Summary
## -----------------------------------------------------------------------------
## Step Variable AIC SBC SBIC R2 Adj. R2
## -----------------------------------------------------------------------------
## 0 Full Model -879.334 -848.821 -1829.725 0.05444 0.03714
## 1 spin_rate -881.055 -854.356 -1831.498 0.05365 0.03927
## -----------------------------------------------------------------------------
##
## Final Model Output
## ------------------
##
## Model Summary
## ----------------------------------------------------------------
## R 0.232 RMSE 0.064
## R-Squared 0.054 MSE 0.004
## Adj. R-Squared 0.039 Coef. Var 23.574
## Pred R-Squared 0.020 AIC -881.055
## MAE 0.049 SBC -854.356
## ----------------------------------------------------------------
## RMSE: Root Mean Square Error
## MSE: Mean Square Error
## MAE: Mean Absolute Error
## AIC: Akaike Information Criteria
## SBC: Schwarz Bayesian Criteria
##
## ANOVA
## -------------------------------------------------------------------
## Sum of
## Squares DF Mean Square F Sig.
## -------------------------------------------------------------------
## Regression 0.077 5 0.015 3.73 0.0027
## Residual 1.356 329 0.004
## Total 1.433 334
## -------------------------------------------------------------------
##
## Parameter Estimates
## --------------------------------------------------------------------------------------------
## model Beta Std. Error Std. Beta t Sig lower upper
## --------------------------------------------------------------------------------------------
## (Intercept) 0.888 0.181 4.899 0.000 0.531 1.244
## pitch_speed -0.006 0.002 -0.259 -3.242 0.001 -0.009 -0.002
## pitcher_break_x 0.007 0.004 0.399 1.828 0.068 0.000 0.014
## pitcher_break_z 0.032 0.017 2.198 1.814 0.071 -0.003 0.066
## pitch_usage 0.000 0.000 -0.105 -1.922 0.055 -0.001 0.000
## ovr_break -0.035 0.018 -2.597 -1.962 0.051 -0.070 0.000
## --------------------------------------------------------------------------------------------
# Stepwise Selection
lm_simple %>% ols_step_both_p(prem = 0.15, pent = 0.15)
##
##
## Stepwise Summary
## ----------------------------------------------------------------------------------
## Step Variable AIC SBC SBIC R2 Adj. R2
## ----------------------------------------------------------------------------------
## 0 Base Model -872.581 -864.953 -1823.341 0.00000 0.00000
## 1 pitch_usage (+) -875.048 -863.605 -1825.823 0.01324 0.01028
## 2 pitch_speed (+) -877.136 -861.879 -1827.881 0.02521 0.01934
## 3 ovr_break (+) -881.609 -862.538 -1832.217 0.04387 0.03520
## ----------------------------------------------------------------------------------
##
## Final Model Output
## ------------------
##
## Model Summary
## ----------------------------------------------------------------
## R 0.209 RMSE 0.064
## R-Squared 0.044 MSE 0.004
## Adj. R-Squared 0.035 Coef. Var 23.624
## Pred R-Squared 0.021 AIC -881.609
## MAE 0.050 SBC -862.538
## ----------------------------------------------------------------
## RMSE: Root Mean Square Error
## MSE: Mean Square Error
## MAE: Mean Absolute Error
## AIC: Akaike Information Criteria
## SBC: Schwarz Bayesian Criteria
##
## ANOVA
## -------------------------------------------------------------------
## Sum of
## Squares DF Mean Square F Sig.
## -------------------------------------------------------------------
## Regression 0.063 3 0.021 5.062 0.0019
## Residual 1.370 331 0.004
## Total 1.433 334
## -------------------------------------------------------------------
##
## Parameter Estimates
## ----------------------------------------------------------------------------------------
## model Beta Std. Error Std. Beta t Sig lower upper
## ----------------------------------------------------------------------------------------
## (Intercept) 0.861 0.176 4.887 0.000 0.514 1.207
## pitch_usage -0.001 0.000 -0.121 -2.249 0.025 -0.001 0.000
## pitch_speed -0.006 0.002 -0.246 -3.236 0.001 -0.009 -0.002
## ovr_break -0.003 0.001 -0.193 -2.541 0.012 -0.005 -0.001
## ----------------------------------------------------------------------------------------
# New Model
lm1 <- lm(xwOBA ~
ovr_break + pitch_usage + pitch_speed,
data = model_data)
# Interaction Linear Regression
lm_interact <- lm(xwOBA ~
pitch_speed + spin_rate + pitcher_break_x + pitcher_break_z + pitch_usage +
ovr_break +
pitch_speed*spin_rate + pitch_speed*pitch_usage + pitch_speed*ovr_break +
pitch_speed*pitcher_break_x + pitch_speed*pitcher_break_z +
spin_rate*ovr_break + spin_rate*pitcher_break_x + spin_rate*pitcher_break_z +
pitch_usage*ovr_break + pitch_usage*spin_rate,
data = model_data)
# model_interact_all <- ols_step_all_possible(lm_interact)
# Stepwise Selection
lm_interact %>% ols_step_both_p(pent = 0.15, prem = 0.05)
##
##
## Stepwise Summary
## ---------------------------------------------------------------------------------------------------
## Step Variable AIC SBC SBIC R2 Adj. R2
## ---------------------------------------------------------------------------------------------------
## 0 Base Model -872.581 -864.953 -1826.673 0.00000 0.00000
## 1 pitch_speed:pitch_usage (+) -875.879 -864.437 -1831.752 0.01569 0.01273
## 2 pitcher_break_z (+) -874.227 -858.970 -1831.890 0.01671 0.01079
## 3 pitch_speed:pitch_usage (-) -870.698 -859.256 -1826.584 0.00035 -0.00265
## 4 pitch_speed (+) -877.840 -862.584 -1835.493 0.02726 0.02140
## 5 pitch_usage (+) -881.134 -862.063 -1840.555 0.04251 0.03383
## 6 pitch_usage:ovr_break (+) -882.750 -859.865 -1843.940 0.05279 0.04131
## 7 ovr_break (+) -882.634 -855.935 -1845.597 0.05810 0.04379
## 8 pitcher_break_z (-) -884.460 -861.575 -1845.642 0.05761 0.04619
## 9 spin_rate (+) -882.608 -855.909 -1845.571 0.05803 0.04371
## 10 ovr_break (-) -874.597 -851.713 -1835.823 0.02946 0.01769
## 11 pitch_speed:ovr_break (+) -882.165 -855.466 -1845.130 0.05678 0.04245
## 12 pitch_usage:ovr_break (-) -879.449 -856.564 -1840.653 0.04341 0.03181
## 13 pitcher_break_x (+) -877.639 -850.940 -1840.627 0.04395 0.02942
## 14 spin_rate (-) -879.454 -856.569 -1840.658 0.04342 0.03183
## 15 pitch_speed:pitcher_break_z (+) -880.623 -853.924 -1843.596 0.05243 0.03803
## ---------------------------------------------------------------------------------------------------
##
## Final Model Output
## ------------------
##
## Model Summary
## ----------------------------------------------------------------
## R 0.229 RMSE 0.064
## R-Squared 0.052 MSE 0.004
## Adj. R-Squared 0.038 Coef. Var 23.589
## Pred R-Squared 0.019 AIC -880.623
## MAE 0.049 SBC -853.924
## ----------------------------------------------------------------
## RMSE: Root Mean Square Error
## MSE: Mean Square Error
## MAE: Mean Absolute Error
## AIC: Akaike Information Criteria
## SBC: Schwarz Bayesian Criteria
##
## ANOVA
## -------------------------------------------------------------------
## Sum of
## Squares DF Mean Square F Sig.
## -------------------------------------------------------------------
## Regression 0.075 5 0.015 3.641 0.0032
## Residual 1.358 329 0.004
## Total 1.433 334
## -------------------------------------------------------------------
##
## Parameter Estimates
## --------------------------------------------------------------------------------------------------------
## model Beta Std. Error Std. Beta t Sig lower upper
## --------------------------------------------------------------------------------------------------------
## (Intercept) 0.729 0.155 4.705 0.000 0.424 1.034
## pitch_speed -0.004 0.002 -0.179 -2.577 0.010 -0.007 -0.001
## pitch_usage -0.001 0.000 -0.106 -1.946 0.053 -0.001 0.000
## pitcher_break_x 0.007 0.004 0.404 1.778 0.076 -0.001 0.014
## pitch_speed:ovr_break 0.000 0.000 -2.250 -1.904 0.058 -0.001 0.000
## pitch_speed:pitcher_break_z 0.000 0.000 1.935 1.768 0.078 0.000 0.001
## --------------------------------------------------------------------------------------------------------
# Output removes ALL interactions for p < 0.05
# Keeps same as simple LM pitcher_break_z + pitch_speed + pitch_usage
lm_interact %>% ols_step_both_p(pent = 0.15, prem = 0.10)
##
##
## Stepwise Summary
## ---------------------------------------------------------------------------------------------------
## Step Variable AIC SBC SBIC R2 Adj. R2
## ---------------------------------------------------------------------------------------------------
## 0 Base Model -872.581 -864.953 -1826.673 0.00000 0.00000
## 1 pitch_speed:pitch_usage (+) -875.879 -864.437 -1831.752 0.01569 0.01273
## 2 pitcher_break_z (+) -874.227 -858.970 -1831.890 0.01671 0.01079
## 3 pitch_speed:pitch_usage (-) -870.698 -859.256 -1826.584 0.00035 -0.00265
## 4 pitch_speed (+) -877.840 -862.584 -1835.493 0.02726 0.02140
## 5 pitch_usage (+) -881.134 -862.063 -1840.555 0.04251 0.03383
## 6 pitch_usage:ovr_break (+) -882.750 -859.865 -1843.940 0.05279 0.04131
## 7 ovr_break (+) -882.634 -855.935 -1845.597 0.05810 0.04379
## 8 pitcher_break_z (-) -884.460 -861.575 -1845.642 0.05761 0.04619
## 9 spin_rate (+) -882.608 -855.909 -1845.571 0.05803 0.04371
## 10 ovr_break (-) -874.597 -851.713 -1835.823 0.02946 0.01769
## 11 pitch_speed:ovr_break (+) -882.165 -855.466 -1845.130 0.05678 0.04245
## 12 pitch_usage:ovr_break (-) -879.449 -856.564 -1840.653 0.04341 0.03181
## 13 pitcher_break_x (+) -877.639 -850.940 -1840.627 0.04395 0.02942
## 14 spin_rate (-) -879.454 -856.569 -1840.658 0.04342 0.03183
## 15 pitch_speed:pitcher_break_z (+) -880.623 -853.924 -1843.596 0.05243 0.03803
## ---------------------------------------------------------------------------------------------------
##
## Final Model Output
## ------------------
##
## Model Summary
## ----------------------------------------------------------------
## R 0.229 RMSE 0.064
## R-Squared 0.052 MSE 0.004
## Adj. R-Squared 0.038 Coef. Var 23.589
## Pred R-Squared 0.019 AIC -880.623
## MAE 0.049 SBC -853.924
## ----------------------------------------------------------------
## RMSE: Root Mean Square Error
## MSE: Mean Square Error
## MAE: Mean Absolute Error
## AIC: Akaike Information Criteria
## SBC: Schwarz Bayesian Criteria
##
## ANOVA
## -------------------------------------------------------------------
## Sum of
## Squares DF Mean Square F Sig.
## -------------------------------------------------------------------
## Regression 0.075 5 0.015 3.641 0.0032
## Residual 1.358 329 0.004
## Total 1.433 334
## -------------------------------------------------------------------
##
## Parameter Estimates
## --------------------------------------------------------------------------------------------------------
## model Beta Std. Error Std. Beta t Sig lower upper
## --------------------------------------------------------------------------------------------------------
## (Intercept) 0.729 0.155 4.705 0.000 0.424 1.034
## pitch_speed -0.004 0.002 -0.179 -2.577 0.010 -0.007 -0.001
## pitch_usage -0.001 0.000 -0.106 -1.946 0.053 -0.001 0.000
## pitcher_break_x 0.007 0.004 0.404 1.778 0.076 -0.001 0.014
## pitch_speed:ovr_break 0.000 0.000 -2.250 -1.904 0.058 -0.001 0.000
## pitch_speed:pitcher_break_z 0.000 0.000 1.935 1.768 0.078 0.000 0.001
## --------------------------------------------------------------------------------------------------------
# New Model with Interactions (p -value < 0.10 threshhold)
lm2 <- lm(xwOBA ~
pitch_speed + pitch_usage +
pitch_speed*ovr_break + pitch_speed*pitcher_break_z,
data = model_data)
# Trimmed Data
model_results <- model_data %>%
select(first_name, last_name,
pitch_speed, pitch_usage, pitcher_break_z, ovr_break, xwOBA)
# Comparing Model Predictions
# lm1 = simple
# lm2 = interactions
model_results <- model_results %>%
mutate(lm1 = predict(lm1, model_results)) %>%
mutate(lm2 = predict(lm2, model_results))
# R and RMSE of Simple Linear Model
with(model_results, cor(xwOBA, lm1))
## [1] 0.2094431
with(model_results, rmse(xwOBA, lm1))
## [1] 0.06394718
# R and RMSE of Interactions Linear Model
with(model_results, cor(xwOBA, lm2))
## [1] 0.2151546
with(model_results, rmse(xwOBA, lm2))
## [1] 0.06386603
model_results %>%
select(xwOBA, lm1, lm2) %>%
pivot_longer(cols = lm1:lm2,
names_to = "model",
values_to = "pred") %>%
mutate(model = str_replace(model, "lm1", "Simple LM"),
model = str_replace(model, "lm2", "Interaction LM")) %>%
ggplot(aes(x = xwOBA, y = pred, color = model)) +
geom_point(shape = 18, size = 1.5, alpha = 0.75) +
geom_smooth(se = FALSE) +
scale_color_manual(values = c("navyblue", "skyblue")) +
theme_classic() +
labs(title = "Linear Models for RHP",
x = "Observed",
y = "Predicted",
color = "Model")
# Model Data (Pitch = Slider, Pitching Hand = Left)
model_data_l <- arsenal %>%
filter(pitch_type == "SL",
pitch_hand == "L") %>%
mutate(ovr_break = sqrt(pitcher_break_x^2 + pitcher_break_z^2))
# Simple Linear Regression
lm_simple_l <- lm(xwOBA ~
pitch_speed + spin_rate + pitcher_break_x + pitcher_break_z +
pitch_usage + ovr_break,
data = model_data_l)
# Overview of all model combinations
model_all_l <- ols_step_all_possible(lm_simple_l)
# Backwards Elimination
lm_simple_l %>% ols_step_backward_p(penter = 0.15)
##
##
## Stepwise Summary
## ------------------------------------------------------------------------------
## Step Variable AIC SBC SBIC R2 Adj. R2
## ------------------------------------------------------------------------------
## 0 Full Model -205.439 -184.518 -491.033 0.04617 -0.01471
## 1 pitch_speed -207.131 -188.825 -492.913 0.04325 -0.00710
## 2 spin_rate -208.977 -193.287 -494.917 0.04180 0.00187
## 3 pitch_usage -209.989 -196.914 -496.149 0.03238 0.00245
## ------------------------------------------------------------------------------
##
## Final Model Output
## ------------------
##
## Model Summary
## -----------------------------------------------------------------
## R 0.180 RMSE 0.081
## R-Squared 0.032 MSE 0.007
## Adj. R-Squared 0.002 Coef. Var 29.106
## Pred R-Squared -0.036 AIC -209.989
## MAE 0.057 SBC -196.914
## -----------------------------------------------------------------
## RMSE: Root Mean Square Error
## MSE: Mean Square Error
## MAE: Mean Absolute Error
## AIC: Akaike Information Criteria
## SBC: Schwarz Bayesian Criteria
##
## ANOVA
## -------------------------------------------------------------------
## Sum of
## Squares DF Mean Square F Sig.
## -------------------------------------------------------------------
## Regression 0.022 3 0.007 1.082 0.3605
## Residual 0.670 97 0.007
## Total 0.692 100
## -------------------------------------------------------------------
##
## Parameter Estimates
## -------------------------------------------------------------------------------------------
## model Beta Std. Error Std. Beta t Sig lower upper
## -------------------------------------------------------------------------------------------
## (Intercept) 0.336 0.068 4.936 0.000 0.201 0.471
## pitcher_break_x -0.013 0.009 -0.653 -1.425 0.157 -0.031 0.005
## pitcher_break_z -0.073 0.046 -4.509 -1.604 0.112 -0.164 0.017
## ovr_break 0.073 0.046 4.835 1.568 0.120 -0.019 0.165
## -------------------------------------------------------------------------------------------
# Stepwise Selection
# lm_simple_l %>% ols_step_both_p(prem = 0.15, pent = 0.15)
# New Model
lm1_l <- lm(xwOBA ~
pitcher_break_x + pitcher_break_z +
ovr_break,
data = model_data_l)
# Model Data (Pitch = Slider, Pitching Hand = Left)
# Interaction Linear Regression
lm_interact_l <- lm(xwOBA ~
pitch_speed + spin_rate + pitcher_break_x + pitcher_break_z + pitch_usage +
ovr_break +
pitch_speed*spin_rate + pitch_speed*pitch_usage + pitch_speed*ovr_break +
pitch_speed*pitcher_break_x + pitch_speed*pitcher_break_z +
spin_rate*ovr_break + spin_rate*pitcher_break_x + spin_rate*pitcher_break_z +
pitch_usage*ovr_break + pitch_usage*spin_rate,
data = model_data_l)
# model_interact_all <- ols_step_all_possible(lm_interact)
# Stepwise Selection
# lm_interact_l %>% ols_step_both_p(pent = 0.15, prem = 0.05)
# Output removes ALL interactions for p < 0.05
# Keeps same as simple LM pitcher_break_z + pitch_speed + pitch_usage
# lm_interact_l %>% ols_step_both_p(pent = 0.20, prem = 0.05)
# New Model with Interactions (p -value < 0.10 threshhold)
lm2_l <- lm(xwOBA ~
spin_rate*pitcher_break_z,
data = model_data_l)
# Trimmed Data
model_results_l <- model_data_l %>%
select(first_name, last_name,
pitch_speed, pitch_usage, pitcher_break_z, ovr_break, xwOBA)
# Comparing Model Predictions
# lm1 = simple
# lm2 = interactions
model_results_l <- model_results_l %>%
mutate(lm1 = predict(lm1, model_results_l)) %>%
mutate(lm2 = predict(lm2, model_results_l))
# R and RMSE of Simple Linear Model
with(model_results_l, cor(xwOBA, lm1))
## [1] 0.04569634
with(model_results_l, rmse(xwOBA, lm1))
## [1] 0.08383695
# R and RMSE of Interactions Linear Model
with(model_results_l, cor(xwOBA, lm2))
## [1] 0.02805347
with(model_results_l, rmse(xwOBA, lm2))
## [1] 0.08418886
model_results_l %>%
select(xwOBA, lm1, lm2) %>%
pivot_longer(cols = lm1:lm2,
names_to = "model",
values_to = "pred") %>%
mutate(model = str_replace(model, "lm1", "Simple LM"),
model = str_replace(model, "lm2", "Interaction LM")) %>%
ggplot(aes(x = xwOBA, y = pred, color = model)) +
geom_point(shape = 18, size = 1.5, alpha = 0.75) +
geom_smooth(se = FALSE) +
scale_color_manual(values = c("navyblue", "skyblue")) +
theme_classic() +
labs(title = "Linear Models for LHP",
x = "Observed",
y = "Predicted",
color = "Model")
# Pitch by Pitch Data (Sliders)
pitches <- pitchers %>%
filter(pitch_type == "SL") %>%
mutate(pfx_x = pfx_x*12,
pfx_z = pfx_z*12,
ovr_break = round(sqrt(pfx_x^2 + pfx_z^2), 3))
# Simple Linear Regression
lm_pitches <- lm(delta_run_exp ~
release_speed + release_spin_rate + pfx_x + pfx_z +
ovr_break + release_extension,
data = pitches)
# Stepwise Selection
# lm_pitches %>% ols_step_both_p(prem = 0.25, pent = 0.15)
# New Model
lm1_pitches <- lm(delta_run_exp ~
release_spin_rate + pfx_x + release_extension,
data = pitches)
# Interaction Linear Regression
lm_interact_pitches <- lm(delta_run_exp ~
release_speed + release_spin_rate + pfx_x + pfx_z +
ovr_break + release_extension +
release_speed*release_spin_rate + release_speed*pfx_x +
release_speed*pfx_z + release_speed*ovr_break +
release_speed*release_extension +
release_spin_rate*pfx_x + release_spin_rate*pfx_z +
release_spin_rate*ovr_break + release_spin_rate*release_extension +
release_extension*pfx_x + release_extension*pfx_z + release_extension*ovr_break,
data = pitches)
# model_interact_all <- ols_step_all_possible(lm_interact)
# Stepwise Selection
lm_interact_pitches %>% ols_step_both_p(pent = 0.15, prem = 0.15)
##
##
## Stepwise Summary
## ---------------------------------------------------------------------------------------------
## Step Variable AIC SBC SBIC R2 Adj. R2
## ---------------------------------------------------------------------------------------------
## 0 Base Model -72.271 -59.891 -10299.956 0.00000 0.00000
## 1 release_spin_rate:pfx_x (+) -73.773 -55.203 -10301.429 0.00097 0.00069
## 2 release_extension (+) -73.562 -48.803 -10301.189 0.00147 0.00091
## 3 ovr_break (+) -73.409 -42.460 -10301.005 0.00198 0.00115
## 4 release_speed (+) -71.418 -34.280 -10298.986 0.00198 0.00087
## 5 ovr_break (-) -71.620 -40.671 -10299.220 0.00148 0.00065
## 6 pfx_x (+) -71.805 -34.667 -10299.372 0.00209 0.00098
## 7 release_extension (-) -72.690 -41.741 -10300.287 0.00178 0.00095
## 8 pfx_z (+) -70.939 -33.800 -10298.508 0.00185 0.00074
## 9 pfx_x (-) -70.480 -39.531 -10298.082 0.00117 0.00033
## 10 release_speed:pfx_z (+) -74.421 -37.282 -10301.980 0.00281 0.00170
## 11 release_spin_rate:pfx_x (-) -75.339 -44.391 -10302.931 0.00251 0.00168
## 12 release_spin_rate (+) -75.115 -37.976 -10302.672 0.00300 0.00190
## 13 pfx_z (-) -71.481 -40.532 -10299.081 0.00144 0.00061
## ---------------------------------------------------------------------------------------------
##
## Final Model Output
## ------------------
##
## Model Summary
## ------------------------------------------------------------------
## R 0.038 RMSE 0.239
## R-Squared 0.001 MSE 0.057
## Adj. R-Squared 0.001 Coef. Var -3503.793
## Pred R-Squared -0.001 AIC -71.481
## MAE 0.118 SBC -40.532
## ------------------------------------------------------------------
## RMSE: Root Mean Square Error
## MSE: Mean Square Error
## MAE: Mean Absolute Error
## AIC: Akaike Information Criteria
## SBC: Schwarz Bayesian Criteria
##
## ANOVA
## --------------------------------------------------------------------
## Sum of
## Squares DF Mean Square F Sig.
## --------------------------------------------------------------------
## Regression 0.298 3 0.099 1.736 0.1574
## Residual 206.296 3600 0.057
## Total 206.595 3603
## --------------------------------------------------------------------
##
## Parameter Estimates
## -----------------------------------------------------------------------------------------------
## model Beta Std. Error Std. Beta t Sig lower upper
## -----------------------------------------------------------------------------------------------
## (Intercept) 0.095 0.129 0.738 0.461 -0.158 0.348
## release_speed -0.002 0.002 -0.022 -1.209 0.227 -0.005 0.001
## release_spin_rate 0.000 0.000 0.026 1.325 0.185 0.000 0.000
## release_speed:pfx_z 0.000 0.000 -0.012 -0.583 0.560 0.000 0.000
## -----------------------------------------------------------------------------------------------
lm2_pitches <- lm(delta_run_exp ~
release_spin_rate*pfx_x + release_extension,
data = pitches)
model_results_pitches <- pitches %>%
select(delta_run_exp, release_speed, release_spin_rate, pfx_x, pfx_z,
release_extension) %>%
mutate(lm1 = predict(lm1_pitches, pitches),
lm2 = predict(lm2_pitches, pitches))
# R and RMSE of Simple Linear Model
model_results_pitches %>%
filter(!is.na(release_spin_rate),
!is.na(pfx_x),
!is.na(release_extension),
!is.na(delta_run_exp)) %>%
with(cor(delta_run_exp, lm1))
## [1] 0.04470764
model_results_pitches %>%
filter(!is.na(release_spin_rate),
!is.na(pfx_x),
!is.na(release_extension),
!is.na(delta_run_exp)) %>%
with(rmse(delta_run_exp, lm1))
## [1] 0.2391844
# R and RMSE of Interaction Linear Model
model_results_pitches %>%
filter(!is.na(release_spin_rate),
!is.na(pfx_x),
!is.na(release_extension),
!is.na(delta_run_exp)) %>%
with(cor(delta_run_exp, lm2))
## [1] 0.04598308
model_results_pitches %>%
filter(!is.na(release_spin_rate),
!is.na(pfx_x),
!is.na(release_extension),
!is.na(delta_run_exp)) %>%
with(rmse(delta_run_exp, lm2))
## [1] 0.2391706
# Graph
model_results_pitches %>%
select(delta_run_exp, lm1, lm2) %>%
pivot_longer(cols = lm1:lm2,
names_to = "model",
values_to = "pred") %>%
mutate(model = str_replace(model, "lm1", "Simple LM"),
model = str_replace(model, "lm2", "Interaction LM")) %>%
ggplot(aes(x = delta_run_exp, y = pred, color = model)) +
geom_point(shape = 18, size = 1.5, alpha = 0.75) +
geom_smooth(se = FALSE) +
scale_color_manual(values = c("navyblue", "skyblue")) +
theme_classic() +
labs(title = "Linear Models for Pitch-by-Pitch Data",
subtitle = "Predicting Run Expectancy Added",
caption = "Pitchers: Scherzer, Taillon, Keller, Manoah, Gallen, Garcia, Gray",
x = "Observed",
y = "Predicted",
color = "Model")
model_results_pitches_2 <- pitches %>%
select(delta_run_exp, release_speed, release_spin_rate, pfx_x, pfx_z,
release_extension, ID) %>%
mutate(lm1 = predict(lm1_pitches, pitches),
lm2 = predict(lm2_pitches, pitches))
# Graph
model_results_pitches_2 %>%
select(ID,delta_run_exp, lm1, lm2) %>%
pivot_longer(cols = lm1:lm2,
names_to = "model",
values_to = "pred") %>%
mutate(model = str_replace(model, "lm1", "Simple LM"),
model = str_replace(model, "lm2", "Interaction LM")) %>%
ggplot(aes(x = delta_run_exp, y = pred, color = model)) +
geom_point(shape = 18, size = 1.5, alpha = 0.75) +
geom_smooth(se = FALSE) +
scale_color_manual(values = c("navyblue", "skyblue")) +
facet_wrap(~ ID, ncol = 1) +
theme_classic() +
labs(title = "Linear Models for Pitch-by-Pitch Data",
subtitle = "Predicting Run Expectancy Added",
caption = "Pitchers: Scherzer, Taillon, Keller, Manoah, Gallen, Garcia, Gray",
x = "Observed",
y = "Predicted",
color = "Model")
# Correlations
model_results_pitches_2 %>%
filter(ID == "Great") %>%
filter(!is.na(release_spin_rate),
!is.na(pfx_x),
!is.na(release_extension),
!is.na(delta_run_exp)) %>%
with(cor(delta_run_exp, lm1))
## [1] 0.06192874
model_results_pitches_2 %>%
filter(ID == "Decent") %>%
filter(!is.na(release_spin_rate),
!is.na(pfx_x),
!is.na(release_extension),
!is.na(delta_run_exp)) %>%
with(cor(delta_run_exp, lm1))
## [1] 0.01215481
model_results_pitches_2 %>%
filter(ID == "Bad") %>%
filter(!is.na(release_spin_rate),
!is.na(pfx_x),
!is.na(release_extension),
!is.na(delta_run_exp)) %>%
with(cor(delta_run_exp, lm1))
## [1] 0.03652784
# RMSE
model_results_pitches_2 %>%
filter(ID == "Great") %>%
filter(!is.na(release_spin_rate),
!is.na(pfx_x),
!is.na(release_extension),
!is.na(delta_run_exp)) %>%
with(rmse(delta_run_exp, lm1))
## [1] 0.1927567
model_results_pitches_2 %>%
filter(ID == "Decent") %>%
filter(!is.na(release_spin_rate),
!is.na(pfx_x),
!is.na(release_extension),
!is.na(delta_run_exp)) %>%
with(rmse(delta_run_exp, lm1))
## [1] 0.2850569
model_results_pitches_2 %>%
filter(ID == "Bad") %>%
filter(!is.na(release_spin_rate),
!is.na(pfx_x),
!is.na(release_extension),
!is.na(delta_run_exp)) %>%
with(rmse(delta_run_exp, lm1))
## [1] 0.2510364
pitchers %>%
# filter(pitch_type == "SL") %>%
ggplot(aes(x = plate_z)) +
geom_histogram(binwidth = 0.15, color = "white")
pitchers %>%
ggplot(aes(x = delta_run_exp)) +
geom_histogram(binwidth = 0.15, color = "white")
pitchers %>%
filter(pitch_type %in% c("FF", "SL", "CH")) %>%
mutate(pitch_dist = sqrt(plate_x^2 + (2.5 - plate_z)^2)) %>%
ggplot(aes(x = pitch_dist, color = ID)) +
geom_density() +
facet_wrap(~ pitch_type,
ncol = 1)
pitchers %>%
filter(pitch_type %in% c("FF", "SL", "CH")) %>%
mutate(pitch_dist = sqrt(plate_x^2 + (2.5 - plate_z)^2)) %>%
ggplot(aes(x = pitch_dist, y = delta_run_exp)) +
geom_point(alpha = 0.15)
# Model?
model <- lm(delta_run_exp ~ dist + speed_change + break_change,
data = pitchers)
preds <- pitchers %>%
mutate(predicted = predict(model, pitchers)) %>%
rename(observed = delta_run_exp) %>%
select(ID, zone, pitch_type, observed, predicted) %>%
filter(pitch_type != "PO")
acc <- preds %>%
ggplot(aes(x = observed, y = predicted)) +
geom_point(alpha = 0.5) +
geom_smooth()
acc
acc +
facet_wrap(~ pitch_type)
acc +
facet_wrap(~ ID)
data_ff <- pitchers %>%
filter(pitch_type == "FF",
!is.na(break_change)) %>%
filter(pitch_type != "PO")
model_ff <- train(
delta_run_exp ~ dist + speed_change + break_change + release_speed + pfx_x + pfx_z,
data = data_ff,
method = "ranger",
trControl = trainControl(method = "cv", number = 5))
preds_ff <- cbind(data_ff, predict(model_ff)) %>%
as.data.frame() %>%
rename(observed = delta_run_exp,
predicted = "predict(model_ff)") %>%
select(ID, zone, pitch_type, observed, predicted)
preds_ff %>%
ggplot(aes(x = observed, y = predicted)) +
geom_point() +
geom_smooth() +
geom_abline(slope = 1, intercept = 0) +
coord_fixed() +
labs(title = "Fastball RF Model",
caption = paste0("RMSE: ", round(rmse(preds_ff$observed, preds_ff$predicted), 4)))
preds_ff %>%
filter(!is.na(observed),
!is.na(predicted)) %>%
with(cor(observed, predicted))
## [1] 0.9625188
data_ff <- pitchers %>%
filter(pitch_type == "FF",
!is.na(break_change))
model_ff <- ranger(delta_run_exp ~ dist + speed_change + break_change,
data = data_ff, mtry = 2)
preds_ff <- data_ff %>%
mutate(predicted = predict(model_ff, data_ff)$predictions) %>%
rename(observed = delta_run_exp) %>%
select(ID, zone, pitch_type, observed, predicted) %>%
filter(pitch_type != "PO")
preds_ff %>%
ggplot(aes(x = observed, y = predicted)) +
geom_point() +
geom_smooth() +
labs(title = "Fastball RF Model",
caption = paste0("RMSE: ", round(rmse(preds_ff$observed, preds_ff$predicted), 4)))
preds_ff %>%
filter(!is.na(observed),
!is.na(predicted)) %>%
with(cor(observed, predicted))
## [1] 0.9710876
data_si <- pitchers %>%
filter(pitch_type == "SI",
!is.na(break_change))
model_si <- ranger(delta_run_exp ~ dist + speed_change + break_change,
data = data_si, mtry = 2)
preds_si <- data_si %>%
mutate(predicted = predict(model_si, data_si)$predictions) %>%
rename(observed = delta_run_exp) %>%
select(ID, zone, pitch_type, observed, predicted) %>%
filter(pitch_type != "PO")
preds_si %>%
ggplot(aes(x = observed, y = predicted)) +
geom_point() +
geom_smooth() +
labs(title = "Sinker RF Model",
caption = paste0("RMSE: ", round(rmse(preds_si$observed, preds_si$predicted), 4)))
preds_si %>%
filter(!is.na(observed),
!is.na(predicted)) %>%
with(cor(observed, predicted))
## [1] 0.9591401
data_ch <- pitchers %>%
filter(pitch_type == "CH",
!is.na(break_change))
model_ch <- ranger(delta_run_exp ~ dist + speed_change + break_change,
data = data_ch, mtry = 2)
preds_ch <- data_ch %>%
mutate(predicted = predict(model_ch, data_ch)$predictions) %>%
rename(observed = delta_run_exp) %>%
select(ID, zone, pitch_type, observed, predicted) %>%
filter(pitch_type != "PO")
preds_ch %>%
ggplot(aes(x = observed, y = predicted)) +
geom_point() +
geom_smooth() +
labs(title = "Change-Up RF Model",
caption = paste0("RMSE: ", round(rmse(preds_ch$observed, preds_ch$predicted), 4)))
preds_ch %>%
filter(!is.na(observed),
!is.na(predicted)) %>%
with(cor(observed, predicted))
## [1] 0.9661838
data_sl <- pitchers %>%
filter(pitch_type == "SL",
!is.na(break_change))
model_sl <- ranger(delta_run_exp ~ dist + speed_change + break_change,
data = data_sl, mtry = 2)
preds_sl <- data_sl %>%
mutate(predicted = predict(model_sl, data_sl)$predictions) %>%
rename(observed = delta_run_exp) %>%
select(ID, zone, pitch_type, observed, predicted) %>%
filter(pitch_type != "PO")
preds_sl %>%
ggplot(aes(x = observed, y = predicted)) +
geom_point() +
geom_smooth() +
labs(title = "Slider RF Model",
caption = paste0("RMSE: ", round(rmse(preds_sl$observed, preds_sl$predicted), 4)))
preds_sl %>%
filter(!is.na(observed),
!is.na(predicted)) %>%
with(cor(observed, predicted))
## [1] 0.9677164
#
# Actmodel <- train(delta_run_exp ~ dist + speed_change + break_change,
# data = data_sl, method = "ranger",
# trControl = trainControl(method = "cv", number = 10, verboseIter = TRUE), preProcess = c("knnImpute"))
# plot(Actmodel$finalModel$forest)
# Slider Logistic Model
whiff_sl <- whiff %>%
filter(pitch_type == "SL") %>%
mutate(whiff = str_replace(whiff, "TRUE", "1"),
whiff = str_replace(whiff, "FALSE", "0"),
whiff = as.numeric(whiff))
# Original Model
model1 <- glm(whiff ~ release_speed + spin_axis + pfx_x + pfx_z + plate_x + plate_z +
release_spin_rate + speed_change + break_change + pfx_total + dist,
data = whiff_sl, family = binomial)
# Reduced Model
model1 <- glm(whiff ~ release_speed + plate_x + plate_z +
release_spin_rate + speed_change + break_change + pfx_total + dist,
data = whiff_sl, family = binomial)
summary(model1)
##
## Call:
## glm(formula = whiff ~ release_speed + plate_x + plate_z + release_spin_rate +
## speed_change + break_change + pfx_total + dist, family = binomial,
## data = whiff_sl)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 9.0550541 3.0643314 2.955 0.00313 **
## release_speed -0.0924999 0.0353807 -2.614 0.00894 **
## plate_x 0.5454560 0.0870095 6.269 3.64e-10 ***
## plate_z -0.2438012 0.0988763 -2.466 0.01367 *
## release_spin_rate -0.0006138 0.0002186 -2.808 0.00498 **
## speed_change 0.1267177 0.0611358 2.073 0.03820 *
## break_change 1.0446186 0.3722565 2.806 0.00501 **
## pfx_total -0.9888782 0.2453062 -4.031 5.55e-05 ***
## dist -0.5004963 0.1216086 -4.116 3.86e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 3093.3 on 3604 degrees of freedom
## Residual deviance: 3003.3 on 3596 degrees of freedom
## (6 observations deleted due to missingness)
## AIC: 3021.3
##
## Number of Fisher Scoring iterations: 5
preds <- whiff_sl %>%
mutate(prediction_log = predict(model1, whiff_sl),
prediction = 1 / (1 + exp(-prediction_log)),
rounded_pred = case_when(
prediction >= 0.5 ~ 1,
prediction < 0.5 ~ 0
)) %>%
filter(!is.na(prediction))
preds %>%
ggplot(aes(x = as.character(whiff), y = prediction)) +
geom_boxplot() +
geom_jitter(alpha = 0.1, width = 0.1, height = 0)
preds %>%
mutate(prediction = round(prediction, 2)) %>%
group_by(prediction) %>%
summarize(mean(whiff)) %>%
as.data.frame() %>%
ggplot(aes(x = prediction, y = `mean(whiff)`)) +
geom_point() +
geom_smooth(se = FALSE) +
labs(y = "observed whiff proportion",
x = "projected % whiff chance",
title = "Whiff proportion by predicted whiff value",
subtitle = "Whiff predictions have a 1% bin width")
preds %>%
arrange(desc(prediction)) %>%
head(10)
## # A tibble: 10 × 85
## pitch_type game_date release_speed release_pos_x release_pos_z player_name
## <chr> <date> <dbl> <dbl> <dbl> <chr>
## 1 SL 2022-06-03 85.5 -2.02 5.32 Gray, Josiah
## 2 SL 2022-04-13 83.4 -1.79 5.18 Gray, Josiah
## 3 SL 2022-04-13 84.7 -1.75 5.25 Gray, Josiah
## 4 SL 2022-04-13 83.3 -1.71 5.09 Gray, Josiah
## 5 SL 2022-04-08 84.5 -1.54 5.17 Gray, Josiah
## 6 SL 2022-04-13 84.1 -1.74 5.19 Gray, Josiah
## 7 SL 2022-04-08 83.7 -1.61 5.2 Gray, Josiah
## 8 SL 2022-04-08 86.2 -1.84 5.4 Gray, Josiah
## 9 SL 2022-04-26 85.7 -1.79 5.28 Gray, Josiah
## 10 SL 2022-04-08 85.1 -1.69 5.19 Gray, Josiah
## # ℹ 79 more variables: batter <dbl>, pitcher...8 <dbl>, events <chr>,
## # description <chr>, zone <dbl>, des <chr>, game_type <chr>, stand <chr>,
## # p_throws <chr>, home_team <chr>, away_team <chr>, type <chr>,
## # hit_location <dbl>, bb_type <chr>, balls <dbl>, strikes <dbl>,
## # game_year <dbl>, pfx_x <dbl>, pfx_z <dbl>, plate_x <dbl>, plate_z <dbl>,
## # on_3b <dbl>, on_2b <dbl>, on_1b <dbl>, outs_when_up <dbl>, inning <dbl>,
## # inning_topbot <chr>, hc_x <dbl>, hc_y <dbl>, tfs_deprecated <lgl>, …
whiff %>%
mutate(count = paste0(balls, "-", strikes)) %>%
filter(pitch_type == "SL") %>%
ggplot(aes(y = whiff, x = pfx_z*12)) +
geom_violin() +
geom_boxplot(alpha = 0.5, width = 0.5) +
facet_wrap(~count) +
labs(title = "Whiff vs. Non-Whiff by Vertical Movement",
x = "Induced Vertical Movement (in.)",
y = "Outcome") +
NULL
# Sliders
whiff %>%
filter(pitch_type =="SL") %>%
ggplot(aes(x = -plate_x, y = plate_z)) +
geom_density_2d_filled(contour_var = "density", alpha = 0.5) +
geom_zone() +
# geom_point(alpha = 0.2) +
coord_fixed() +
facet_grid(cols = vars(stand), rows = vars(whiff)) +
theme_bw()
# Fastballs
whiff %>%
filter(pitch_type =="FF") %>%
ggplot(aes(x = -plate_x, y = plate_z)) +
geom_density_2d_filled(contour_var = "density", alpha = 0.5) +
geom_zone() +
# geom_point(alpha = 0.2) +
coord_fixed() +
facet_grid(cols = vars(stand), rows = vars(whiff)) +
theme_bw()
# Change-Ups
whiff %>%
filter(pitch_type =="CH") %>%
ggplot(aes(x = -plate_x, y = plate_z)) +
geom_density_2d_filled(contour_var = "density", alpha = 0.5) +
geom_zone() +
# geom_point(alpha = 0.2) +
coord_fixed() +
facet_grid(cols = vars(stand), rows = vars(whiff)) +
theme_bw()
whiff %>%
arrange(game_date, player_name, at_bat_number, pitch_number) %>%
mutate(prev_pitch = lag(pitch_type, n = 1, default = NA)) %>%
filter(stand == "R",
pitch_type == "SL",
prev_pitch %in% c("FF", "CH", "SL", "CU"),
player_name == "Scherzer, Max") %>%
ggplot(aes(x = -plate_x, y = plate_z)) +
geom_density_2d_filled(contour_var = "density", alpha = 0.5) +
geom_zone() +
coord_fixed() +
facet_grid(cols = vars(prev_pitch), rows = vars(whiff)) +
theme_bw()
whiff %>%
arrange(game_date, player_name, at_bat_number, pitch_number) %>%
mutate(prev_pitch = lag(pitch_type, n = 1, default = NA)) %>%
filter(stand == "R",
pitch_type == "FF") %>%
ggplot(aes(x = -plate_x, y = plate_z)) +
geom_density_2d_filled(contour_var = "density", alpha = 0.5) +
geom_zone() +
coord_fixed() +
facet_grid(cols = vars(player_name), rows = vars(whiff)) +
theme_bw()
zoned <- whiff %>%
mutate(loc_x = round(plate_x*3, 0),
loc_y = round(plate_z*3, 0))
zoned %>%
filter(pitch_type == "FF",
plate_z > 0 & plate_z < 6,
plate_x > -1.5 & plate_x < 1.5) %>%
summarize(whiff_perc = mean(whiff == "TRUE"),
pitches = n(),
.by = c(loc_x, loc_y, player_name)) %>%
filter(pitches >= 10) %>%
ggplot(aes(x = -loc_x, y = loc_y, fill = whiff_perc)) +
geom_tile() +
scale_fill_gradient(low = "gray", high = "red") +
facet_wrap(~ player_name) +
coord_fixed() +
theme_bw()
# Slider Logistic Model
whiff_sl2 <- whiff_l %>%
filter(pitch_type == "SL") %>%
mutate(whiff = str_replace(whiff, "TRUE", "1"),
whiff = str_replace(whiff, "FALSE", "0"),
whiff = as.numeric(whiff))
# Original Model
model2 <- glm(whiff ~ pitch_speed + spin_axis + pfx_x + pfx_z + plate_x + plate_z +
release_spin_rate + speed_change + break_change + pfx_total + distance,
data = whiff_sl2, family = binomial)
# Reduced Model
model2 <- glm(whiff ~ pitch_speed + plate_x + plate_z +
release_spin_rate + speed_change + break_change + pfx_total + distance,
data = whiff_sl2, family = binomial)
summary(model2)
##
## Call:
## glm(formula = whiff ~ pitch_speed + plate_x + plate_z + release_spin_rate +
## speed_change + break_change + pfx_total + distance, family = binomial,
## data = whiff_sl2)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.0425752 0.9277708 -5.435 5.47e-08 ***
## pitch_speed 0.0501462 0.0103594 4.841 1.29e-06 ***
## plate_x -0.3698816 0.0469998 -7.870 3.55e-15 ***
## plate_z -0.4913540 0.0488865 -10.051 < 2e-16 ***
## release_spin_rate 0.0002894 0.0001245 2.325 0.0201 *
## speed_change 0.0038569 0.0228735 0.169 0.8661
## break_change 0.2291186 0.2001940 1.144 0.2524
## pfx_total -0.1649991 0.1196077 -1.380 0.1677
## distance -0.7165732 0.0642529 -11.152 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 11359 on 13558 degrees of freedom
## Residual deviance: 11095 on 13550 degrees of freedom
## (98 observations deleted due to missingness)
## AIC: 11113
##
## Number of Fisher Scoring iterations: 5
preds2 <- whiff_sl2 %>%
mutate(prediction_log = predict(model2, whiff_sl2),
prediction = 1 / (1 + exp(-prediction_log)),
rounded_pred = case_when(
prediction >= 0.5 ~ 1,
prediction < 0.5 ~ 0
)) %>%
filter(!is.na(prediction))
preds2 %>%
ggplot(aes(x = as.character(whiff), y = prediction)) +
geom_boxplot() +
geom_jitter(alpha = 0.1, width = 0.1, height = 0)
preds2 %>%
mutate(prediction = round(prediction, 2)) %>%
group_by(prediction) %>%
summarize(mean(whiff)) %>%
as.data.frame() %>%
ggplot(aes(x = prediction, y = `mean(whiff)`)) +
geom_point() +
geom_smooth(se = FALSE) +
labs(y = "observed whiff proportion",
x = "projected % whiff chance",
title = "Whiff proportion by predicted whiff value",
subtitle = "Whiff predictions have a 1% bin width")
whiff_l %>%
mutate(count = paste0(balls, "-", strikes)) %>%
filter(pitch_type == "SL") %>%
ggplot(aes(y = whiff, x = pfx_z*12)) +
geom_violin() +
geom_boxplot(alpha = 0.5, width = 0.5) +
facet_wrap(~count) +
labs(title = "Whiff vs. Non-Whiff by Vertical Movement",
x = "Induced Vertical Movement (in.)",
y = "Outcome") +
NULL
# Sliders
whiff_l %>%
filter(pitch_type =="SL") %>%
ggplot(aes(x = -plate_x, y = plate_z)) +
geom_density_2d_filled(contour_var = "density", alpha = 0.5) +
geom_zone() +
# geom_point(alpha = 0.2) +
coord_fixed() +
facet_grid(cols = vars(hitter), rows = vars(whiff)) +
theme_bw()
# Fastballs
whiff_l %>%
filter(pitch_type =="FF") %>%
ggplot(aes(x = -plate_x, y = plate_z)) +
geom_density_2d_filled(contour_var = "density", alpha = 0.5) +
geom_zone() +
# geom_point(alpha = 0.2) +
coord_fixed() +
facet_grid(cols = vars(hitter), rows = vars(whiff)) +
theme_bw()
# Change-Ups
whiff_l %>%
filter(pitch_type =="CH") %>%
ggplot(aes(x = -plate_x, y = plate_z)) +
geom_density_2d_filled(contour_var = "density", alpha = 0.5) +
geom_zone() +
# geom_point(alpha = 0.2) +
coord_fixed() +
facet_grid(cols = vars(hitter), rows = vars(whiff)) +
theme_bw()
whiff_l %>%
arrange(game_date, player_name, at_bat_number, pitch_number) %>%
mutate(prev_pitch = lag(pitch_type, n = 1, default = NA)) %>%
filter(hitter == "R",
pitch_type == "SL",
prev_pitch %in% c("FF", "CH", "SL", "CU"),
player_name == "Fried, Max") %>%
ggplot(aes(x = -plate_x, y = plate_z)) +
geom_density_2d_filled(contour_var = "density", alpha = 0.5) +
geom_zone() +
coord_fixed() +
facet_grid(cols = vars(prev_pitch), rows = vars(whiff)) +
theme_bw()
whiff_l %>%
arrange(game_date, player_name, at_bat_number, pitch_number) %>%
mutate(prev_pitch = lag(pitch_type, n = 1, default = NA)) %>%
filter(hitter == "R",
pitch_type == "FF") %>%
ggplot(aes(x = -plate_x, y = plate_z)) +
geom_density_2d_filled(contour_var = "density", alpha = 0.5) +
geom_zone() +
coord_fixed() +
facet_grid(cols = vars(player_name), rows = vars(whiff)) +
theme_bw()
zoned2 <- whiff_l %>%
mutate(loc_x = round(plate_x*3, 0),
loc_y = round(plate_z*3, 0))
zoned2 %>%
filter(pitch_type == "FF",
plate_z > 0 & plate_z < 6,
plate_x > -1.5 & plate_x < 1.5) %>%
summarize(whiff_perc = mean(whiff == "TRUE"),
pitches = n(),
.by = c(loc_x, loc_y, player_name)) %>%
filter(pitches >= 10) %>%
ggplot(aes(x = -loc_x, y = loc_y, fill = whiff_perc)) +
geom_tile() +
scale_fill_gradient(low = "gray", high = "red") +
facet_wrap(~ player_name) +
coord_fixed() +
theme_bw()